***Panel for earnings above 100USD***
version 17.0
clear all
set more off
capture log close
cd "P:\2018\186"

*1) Creating a dataset in wide format
use idnr woman yob if yob!=. & woman!=. & yob<=1998 using "use\birthrecords", replace
*Earnings from IoT
local years "1968 1971 1973 1976 1979 1982"
foreach x of local years{
	sort idnr
	merge 1:1 idnr using "use\iot`x'", keepusing(earn`x')  keep(1 3) nogen
}
*Earnings from FoB FoB 1970, 1975, and 1980
forval i=1970(5)1980{
	sort idnr
	merge 1:1 idnr using "use\fob`i'", keepusing(earn`i') keep(1 3) nogen
}
*Earnings from LOUISE 1985-2019
forval i=1985(1)2019{
	sort idnr
	merge 1:1 idnr using "use\louise`i'", keepusing(yob earn`i') keep(1 3) nogen
}
*Dropping individuals with no observed earnings
egen t=rowmean(earn1968-earn2019)
drop if t==.
drop t
*Real earnings in 2016 prices
replace earn1968=round(earn1968*(4623/551),1)
replace earn1970=round(earn1970*(4623/605),1)
replace earn1971=round(earn1971*(4623/650),1)
replace earn1973=round(earn1973*(4623/735),1)
replace earn1975=round(earn1975*(4623/887),1)
replace earn1976=round(earn1976*(4623/979),1)
replace earn1979=round(earn1979*(4623/1286),1)
replace earn1980=round(earn1980*(4623/1461),1)
replace earn1982=round(earn1982*(4623/1778),1)
replace earn1985=round(earn1985*(4623/2246),1)
replace earn1986=round(earn1986*(4623/2341),1)
replace earn1987=round(earn1987*(4623/2440),1)
replace earn1988=round(earn1988*(4623/2582),1)
replace earn1989=round(earn1989*(4623/2748),1)
replace earn1990=round(earn1990*(4623/3036),1)
replace earn1991=round(earn1991*(4623/3319),1)
replace earn1992=round(earn1992*(4623/3395),1)
replace earn1993=round(earn1993*(4623/3553),1)
replace earn1994=round(earn1994*(4623/3631),1)
replace earn1995=round(earn1995*(4623/3723),1)
replace earn1996=round(earn1996*(4623/3740),1)
replace earn1997=round(earn1997*(4623/3760),1)
replace earn1998=round(earn1998*(4623/3754),1)
replace earn1999=round(earn1999*(4623/3772),1)
replace earn2000=round(earn2000*(4623/3809),1)
replace earn2001=round(earn2001*(4623/3902),1)
replace earn2002=round(earn2002*(4623/3986),1)
replace earn2003=round(earn2003*(4623/4063),1)
replace earn2004=round(earn2004*(4623/4078),1)
replace earn2005=round(earn2005*(4623/4097),1)
replace earn2006=round(earn2006*(4623/4153),1)
replace earn2007=round(earn2007*(4623/4243),1)
replace earn2008=round(earn2008*(4623/4390),1)
replace earn2009=round(earn2009*(4623/4378),1)
replace earn2010=round(earn2010*(4623/4434),1)
replace earn2011=round(earn2011*(4623/4550),1)
replace earn2012=round(earn2012*(4623/4590),1)
replace earn2013=round(earn2013*(4623/4588),1)
replace earn2014=round(earn2014*(4623/4580),1)
replace earn2015=round(earn2015*(4623/4578),1)
replace earn2016=round(earn2016*(4623/4623),1)
replace earn2017=round(earn2017*(4623/4706),1)
replace earn2018=round(earn2018*(4623/4798),1)
replace earn2019=round(earn2019*(4623/4884),1)
*Dollar value in 1967: 5,18 
*CPI index in 1967: 540
*100 USD in 1967 expressed in 2016 SEK is 4435 SEK
*Dropping earnings below 100 USD in 1967-dollars
replace earn1968=. if earn1968<4435
replace earn1970=. if earn1970<4435
replace earn1971=. if earn1971<4435
replace earn1973=. if earn1973<4435
replace earn1975=. if earn1975<4435
replace earn1976=. if earn1976<4435
replace earn1979=. if earn1979<4435
replace earn1980=. if earn1980<4435
replace earn1982=. if earn1982<4435
replace earn1985=. if earn1985<4435
replace earn1986=. if earn1986<4435
replace earn1987=. if earn1987<4435
replace earn1988=. if earn1988<4435
replace earn1989=. if earn1989<4435
replace earn1990=. if earn1990<4435
replace earn1991=. if earn1991<4435
replace earn1992=. if earn1992<4435
replace earn1993=. if earn1993<4435
replace earn1994=. if earn1994<4435
replace earn1995=. if earn1995<4435
replace earn1996=. if earn1996<4435
replace earn1997=. if earn1997<4435
replace earn1998=. if earn1998<4435
replace earn1999=. if earn1999<4435
replace earn2000=. if earn2000<4435
replace earn2001=. if earn2001<4435
replace earn2002=. if earn2002<4435
replace earn2003=. if earn2003<4435
replace earn2004=. if earn2004<4435
replace earn2005=. if earn2005<4435
replace earn2006=. if earn2006<4435
replace earn2007=. if earn2007<4435
replace earn2008=. if earn2008<4435
replace earn2009=. if earn2009<4435
replace earn2010=. if earn2010<4435
replace earn2011=. if earn2011<4435
replace earn2012=. if earn2012<4435
replace earn2013=. if earn2013<4435
replace earn2014=. if earn2014<4435
replace earn2015=. if earn2015<4435
replace earn2016=. if earn2016<4435
replace earn2017=. if earn2017<4435
replace earn2018=. if earn2018<4435
replace earn2019=. if earn2019<4435
compress
sort idnr
order idnr earn1968-earn2016
save "use\incwide5", replace

*2) Creating a panel
use "use\incwide5", replace
reshape long earn, i(idnr) j(year)
g age=year-yob
*Keeping observations between 18 and 65 years of age
keep if age>=18 & age<=65
lab var age  "Age"
lab var earn "Income"
lab var year "Year"
lab var yob "Year of birth"
compress
sort idnr year
save "use\incpanel5", replace

*3) Childrens's earnings at different age intervals
use idnr yob year earn age woman if earn!=. & (age>=29 & age<=41) using "use\incpanel5", replace
g earn1=earn if (age>=29 & age<=31)
g earn2=earn if (age>=34 & age<=36)
g earn3=earn if (age>=39 & age<=41)
g earn4=earn if (age>=30 & age<=36)
*Dropping individuals with less than 2 positive incomes
bysort idnr: egen nearn=count(earn)
drop if nearn<2
forval i=1/4{
	bysort idnr: egen nearn`i'=count(earn`i')
}
*Averaging earnings over time
collapse (mean) earn earn1 earn2 earn3 earn4 yob woman nearn1 nearn2 nearn3 nearn4, by(idnr)
lab var earn "earnings age 29-41"
lab var earn1 "earnings age 29-31"
lab var earn2 "earnings age 34-36"
lab var earn3 "earnings age 39-41"
lab var earn4 "earnings age 30-36"
lab var nearn1 "number of observed incomes age 29-31"
lab var nearn2 "number of observed incomes age 34-36"
lab var nearn3 "number of observed incomes age 39-41"
lab var nearn4 "number of observed incomes age 30-36"
lab var yob "year of birth"
compress
sort idnr
save "use\earn_children5", replace

*4) Parental earnings at different age intervals
use idnr yob year earn age woman if earn!=. & (age>=42 & age<=58) using "use\incpanel5", replace
g earn1=earn if (age>=42 & age<=48)
g earn2=earn if (age>=46 & age<=52)
g earn3=earn if (age>=52 & age<=58)
*Dropping all individuals with less than 2 positive earnings
bysort idnr: egen nearn=count(earn)
drop if nearn<2
forval i=1/3{
	bysort idnr: egen nearn`i'=count(earn`i')
}
*Averaging earnings over time
collapse (mean) earn earn1 earn2 earn3 yob woman nearn1 nearn2 nearn3, by(idnr)
lab var earn "earnings age 42-58"
lab var earn1 "earnings age 42-48"
lab var earn2 "earnings age 46-52"
lab var earn3 "earnings age 52-58"
lab var nearn1 "number of observed incomes age 42-48"
lab var nearn2 "number of observed incomes age 46-52"
lab var nearn3 "number of observed incomes age 52-58"
lab var yob "year of birth"
compress
sort idnr
save "use\earn_parents5", replace

********************************************************************************
*1) Parental earnings above 100 USD per child cohort: 5 years of (potential) earnings starting when the child is 12, adjusting for observation gaps in these years: 1969, 1972, 1974, 1977-78, 1981, and 1983-84
*1952-1956 cohort
forval i=52/56{
	use if year>=1968 & year<=1975 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}
*1957-1958 cohort
forval i=57/58{
	use if year>=1969 & year<=1976 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}
*1959 cohort
use if year>=1971 & year<=1979 & earn!=. using "use\incpanel5", replace
bysort idnr: egen nearn=count(earn)
collapse (mean) earn yob woman nearn year, by(idnr)
rename year obsy
sort idnr
compress
save "use\earn59_5", replace
*1960-1961 cohort
forval i=60/61{
	use if year>=1972 & year<=1980 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}
*1962-1963 cohort
forval i=62/63{
	use if year>=1974 & year<=1982 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}
*1964 cohort
use if year>=1976 & year<=1985 & earn!=. using "use\incpanel5", replace
bysort idnr: egen nearn=count(earn)
collapse (mean) earn yob woman nearn year, by(idnr)
rename year obsy
sort idnr
compress
save "use\earn64_5", replace
*1965-1967 cohort
forval i=65/67{
	use if year>=1977 & year<=1986 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}
*1968 cohort
use if year>=1980 & year<=1987 & earn!=. using "use\incpanel5", replace
bysort idnr: egen nearn=count(earn)
collapse (mean) earn yob woman nearn year, by(idnr)
rename year obsy
sort idnr
compress
save "use\earn68_5", replace
*1969-1970 cohort
forval i=69/70{
	use if year>=1981 & year<=1988 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}
*1971-1972 cohort
forval i=71/72{
	use if year>=1983 & year<=1989 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}
*1973-1994 cohorts
forval i=73/94{
	use if year>=19`i'+12 & year<=19`i'+16 & earn!=. using "use\incpanel5", replace
	bysort idnr: egen nearn=count(earn)
	collapse (mean) earn yob woman nearn year, by(idnr)
	rename year obsy
	sort idnr
	compress
	save "use\earn`i'_5", replace
}

*2) Earnings panel
use if yob>=1952 & yob<=1994 & age>=25 & age<=48 & earn!=. using "use\incpanel5", replace
rename earn earn_c
*2a) Adding parents
sort idnr
merge m:1 idnr using "use\multigen", keepusing(midnr fidnr myob fyob bcountry mbcountry fbcountry) keep(1 3) nogen
*2b) Adding parental earnings 
*mothers
rename (idnr midnr earn_c)(t1 idnr t2)
sort idnr
merge m:1 idnr using "use\earn52_5", keepusing(earn nearn obsy) nogen keep(1 3)
replace earn=. if yob!=1952
replace nearn=. if yob!=1952
replace obsy=. if yob!=1952
rename (earn nearn obsy)(mearn_c mnearn_c mobsy_c)
forval i=53/94{
	merge m:1 idnr using "use\earn`i'_5", keepusing(earn nearn obsy) nogen keep(1 3)
	replace mearn_c=earn if yob==19`i'
	replace mnearn_c=nearn if yob==19`i'
	replace mobsy_c=obsy if yob==19`i'
	drop earn nearn obsy
}
*fathers
rename (idnr fidnr)(midnr idnr)
sort idnr
merge m:1 idnr using "use\earn52_5", keepusing(earn nearn obsy) nogen keep(1 3)
replace earn=. if yob!=1952
replace nearn=. if yob!=1952
replace obsy=. if yob!=1952
rename (earn nearn obsy)(fearn_c fnearn_c fobsy_c)
forval i=53/94{
	merge m:1 idnr using "use\earn`i'_5", keepusing(earn nearn obsy) nogen keep(1 3)
	replace fearn_c=earn if yob==19`i'
	replace fnearn_c=nearn if yob==19`i'
	replace fobsy_c=obsy if yob==19`i'
	drop earn nearn obsy
}
rename (idnr t1 t2)(fidnr idnr earn_c)
lab var fearn_c "father's earnings above 100USD"
lab var mearn_c "mother's earnings above 100USD"
lab var fnearn_c "observations father's earnings above 100USD"
lab var mnearn_c "observations mother's earnings above 100USD"
lab var fobsy_c "year father's earnings observed above 100USD"
lab var mobsy_c "year mother's earnings observed above 100USD"
*2c) Adding education
rename(idnr fidnr)(temp idnr)
sort idnr
merge m:1 idnr using "use\edu_max", keepusing(edlev edu) keep(1 3) nogen
rename(edlev edu idnr midnr)(fedlev fedu fidnr idnr)
sort idnr
merge m:1 idnr using "use\edu_max", keepusing(edlev edu) keep(1 3) nogen
rename(edlev edu idnr temp)(medlev medu midnr idnr)
sort idnr
merge m:1 idnr using "use\edu_max", keepusing(edlev edu) keep(1 3) nogen
*2d) Imposing restrictions
*i) Dropping children with both parents missing
drop if missing(fidnr) & missing(midnr)
*ii) Dropping parental income averages with less than 2 observed incomes
replace fearn_c=. if fnearn_c<2 | fnearn_c==.
replace mearn_c=. if mnearn_c<2 | mnearn_c==.
*ii) Dropping children without parental income
drop if fearn_c==. & mearn_c==.
*2e) Log-earnings for elasticity estimates
g ms=mnearn_c>=2 & mnearn_c!=.
g fs=fnearn_c>=2 & fnearn_c!=.
egen fmearn_c=rowmean(fearn_c mearn_c)
egen fmmax_c=rowmax(fearn_c mearn_c)
lab var fmearn_c "average parental earnings above 100USD"
lab var fmmax_c "parental earnings max average above 100USD"
g lfmearn_c=log(fmearn_c)
g lfmmax_c=log(fmmax_c)
g learn_c=log(earn_c)
g lfearn_c=log(fearn_c) if fs==1
g lmearn_c=log(mearn_c) if ms==1
lab var lfmearn_c "log parental earnings above 100USD"
lab var lfmmax_c "log parental max earnings above 100USD"
lab var learn_c "log earnings above 100USD"
lab var lfearn_c "log father's earnings above 100USD"
lab var lmearn_c "log mother's earnings above 100USD"
*2f) Ranks for child and average parental earnings by (child's) cohort separately by gender (of child)
local vars "fmearn_c fmmax_c earn_c fearn_c mearn_c"
foreach x of local vars{
	bysort yob woman: egen n=count(`x')
	bysort yob woman: egen i=rank(`x')
	bysort yob woman: g p`x'=(i-1)/(n-1)*100
	drop n i
}
drop ms fs
compress
sort idnr year
save "use\swepanel_100usd", replace